Evaluation of RISK survival models

This document highlights the use of

for the evaluation (RRPlot), and calibration of cox models (CoxRiskCalibration) or logistic models (CalibrationProbPoissonRisk) of survival data.

Furthermore, it can be used to evaluate any Risk index that reruns the probability of a future event on external data-set.

This document will use the survival::rotterdam, and survival::gbsg data-sets to train and predict the risk of cancer recurrence after surgery. Both Cox and Logistic models will be trained and evaluated.

Here are some sample plots returned by the evaluated functions:

The libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

Breast Cancer Royston-Altman data

data(gbsg, package=“survival”) and data(rotterdam, package=“survival”)

gbsgdata <- gbsg
rownames(gbsgdata) <- gbsgdata$pid
gbsgdata$pid <- NULL

odata <-rotterdam
rownames(odata) <- odata$pid
odata$pid <- NULL
odata$rfstime <- odata$rtime
odata$status <- odata$recur
odata$rtime <- NULL
odata$recur <- NULL

odata <- odata[,colnames(odata) %in% colnames(gbsgdata)]

odata$size <- 10*(odata$size=="<=20") + 
  35*(odata$size=="20-50") + 
  60*(odata$size==">50")

data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,odata))

data$`(Intercept)` <- NULL

dataBrestCancerTrain <- cbind(time=odata[rownames(data),"rfstime"],status=odata[rownames(data),"status"],data)

colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),":","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain)," ","")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"\\.","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"-","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),">","_")
dataBrestCancerTrain$time <- dataBrestCancerTrain$time/365 ## To years


pander::pander(table(odata[rownames(data),"status"]),caption="rotterdam")
rotterdam
0 1
1464 1518

data(gbsg, package=“survival”) data conditioning

gbsgdata <- gbsgdata[,colnames(odata)]
data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,gbsgdata))

data$`(Intercept)` <- NULL

dataBrestCancerTest <- cbind(time=gbsgdata[rownames(data),"rfstime"],status=gbsgdata[rownames(data),"status"],data)

colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),":","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest)," ","")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"\\.","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"-","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),">","_")
dataBrestCancerTest$time <- dataBrestCancerTest$time/365

pander::pander(table(odata[rownames(data),"status"]), caption="gbsg")
gbsg
0 1
499 183

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataBrestCancerTrain,loops=1,NumberofRepeats = 5)

—–.

sm <- summary(ml)
pander::pander(sm$coefficients)
  Estimate lower HR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
age_nodes 7.76e-04 1.001 1.001 1.001 0.626 0.601 0.633 0.630 0.602 0.634 0.028330 0.42972 12.26 13.415 3.21e-02 1.0
size_grade 5.02e-03 1.005 1.005 1.006 0.598 0.624 0.633 0.599 0.627 0.635 0.017876 0.37163 9.49 10.720 7.67e-03 1.0
nodes 8.25e-02 1.077 1.086 1.095 0.637 0.642 0.643 0.640 0.644 0.644 0.007344 0.06897 8.25 2.000 6.04e-05 1.0
size 6.77e-03 1.005 1.007 1.009 0.595 0.641 0.643 0.595 0.642 0.644 0.013742 0.33845 7.79 9.406 1.39e-03 1.0
size_nodes -3.61e-04 1.000 1.000 1.000 0.624 0.643 0.643 0.629 0.644 0.644 0.003445 0.34249 7.24 9.559 -3.56e-04 1.0
age_pgr -4.02e-06 1.000 1.000 1.000 0.548 0.631 0.635 0.544 0.634 0.637 0.009210 0.18985 6.33 5.482 3.16e-03 0.2
grade 1.97e-01 1.138 1.218 1.303 0.565 0.638 0.643 0.561 0.639 0.644 0.009051 0.20188 5.83 6.161 5.02e-03 1.0
age_size -1.23e-04 1.000 1.000 1.000 0.567 0.629 0.633 0.568 0.632 0.634 0.005990 0.18848 5.63 5.214 2.53e-03 1.0
age -2.97e-03 0.996 0.997 0.998 0.513 0.628 0.643 0.513 0.628 0.644 0.004165 0.09238 5.27 2.525 1.55e-02 1.0
grade_nodes -1.31e-02 0.982 0.987 0.992 0.635 0.645 0.643 0.639 0.646 0.644 0.002028 -0.09046 4.95 -2.532 -2.68e-03 1.0
grade_pgr 4.96e-05 1.000 1.000 1.000 0.541 0.633 0.635 0.537 0.636 0.637 0.004752 0.26528 4.83 7.348 1.01e-03 0.2
size_pgr 1.22e-06 1.000 1.000 1.000 0.490 0.633 0.635 0.494 0.635 0.637 0.002102 0.01627 3.82 0.452 1.62e-03 0.2
meno_nodes -2.80e-03 0.996 0.997 0.999 0.580 0.635 0.635 0.584 0.637 0.637 0.001735 0.00739 3.49 0.205 -5.37e-04 0.2
meno_pgr 7.13e-05 1.000 1.000 1.000 0.527 0.634 0.635 0.522 0.636 0.637 0.002519 0.06788 3.30 1.862 4.64e-04 0.2
age_grade -8.79e-05 1.000 1.000 1.000 0.508 0.632 0.635 0.509 0.634 0.637 0.000891 0.07139 2.42 1.951 2.66e-03 0.2

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 5 # Five years

h0 <- sum(dataBrestCancerTrain$status & dataBrestCancerTrain$time <= timeinterval)
h0 <- h0/sum((dataBrestCancerTrain$time > timeinterval) | (dataBrestCancerTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.429 5
index <- predict(ml,dataBrestCancerTrain)
rdata <- cbind(dataBrestCancerTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories

obsexp <- rrAnalysisTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 1518 1443 1596 1464 0.15818
low 811 756 869 854 0.15050
90% 250 220 283 210 0.00631
80% 457 416 501 398 0.00334
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

The Time vs. Events are not calibrated. Lets do the calibration

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.457 0.386 0.345 0.215 1.88e-01 0.4997
RR 1.696 1.723 1.787 2.542 7.67e+01 1.6980
RR_LCI 1.591 1.612 1.662 1.967 1.59e-01 1.5924
RR_UCI 1.807 1.841 1.921 3.285 3.70e+04 1.8107
SEN 0.301 0.466 0.590 0.969 1.00e+00 0.2411
SPE 0.900 0.798 0.704 0.121 1.02e-02 0.9290
BACC 0.600 0.632 0.647 0.545 5.05e-01 0.5850
NetBenefit 0.112 0.175 0.223 0.375 3.96e-01 0.0879
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.04 0.985 1.09 0.158
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.14 1.14 1.14 1.15
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.32 1.32 1.32 1.32
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.677 0.677 0.664 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.676 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.301 0.278 0.325
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.457 0.386
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 477.549428 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1980 811 1143 96.4 395.1
class=1 398 250 179 28.5 32.4
class=2 604 457 197 345.3 401.9

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBrestCancerTrain,"status","time")

( 7.208961 , 29202.06 , 1.076428 , 1518 , 1803.695 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.711 1.37 7.21

The RRplot() of the calibrated model

rcaldata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisCalTrain <- RRPlot(rcaldata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories

obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 1518 1443 1596 1682 4.91e-05
low 811 756 869 981 2.71e-08
90% 250 220 283 241 5.40e-01
80% 457 416 501 457 9.81e-01
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Cal. Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.457 0.386 0.345 0.215 1.88e-01 0.4997
RR 1.696 1.723 1.787 2.542 7.67e+01 1.6980
RR_LCI 1.591 1.612 1.662 1.967 1.59e-01 1.5924
RR_UCI 1.807 1.841 1.921 3.285 3.70e+04 1.8107
SEN 0.301 0.466 0.590 0.969 1.00e+00 0.2411
SPE 0.900 0.798 0.704 0.121 1.02e-02 0.9290
BACC 0.600 0.632 0.647 0.545 5.05e-01 0.5850
NetBenefit 0.112 0.175 0.223 0.375 3.96e-01 0.0879
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.04 0.985 1.09 0.158
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.14 1.14 1.14 1.15
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.32 1.32 1.32 1.32
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.677 0.677 0.664 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.676 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.301 0.278 0.325
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.457 0.386
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 477.549428 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1980 811 1143 96.4 395.1
class=1 398 250 179 28.5 32.4
class=2 604 457 197 345.3 401.9

Performance on the external data set

index <- predict(ml,dataBrestCancerTest)
pp <- predictionStats_binary(cbind(dataBrestCancerTest$status,index),plotname="Breast Cancer")

par(op)


prob <- ppoisGzero(index,h0)
rdata <- cbind(dataBrestCancerTest$status,prob)
rrCoxTestAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

External Data Report

pander::pander(t(rrCoxTestAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.457 @:0.386 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.456 0.386 0.359 0.229 1.80e-01 0.4993
RR 1.831 1.625 1.679 3.108 2.20e+01 1.7296
RR_LCI 1.565 1.379 1.416 1.630 4.75e-02 1.4698
RR_UCI 2.141 1.915 1.990 5.925 1.02e+04 2.0354
SEN 0.344 0.465 0.548 0.973 1.00e+00 0.2742
SPE 0.871 0.742 0.680 0.119 1.29e-02 0.8941
BACC 0.608 0.603 0.614 0.546 5.06e-01 0.5842
NetBenefit 0.089 0.111 0.138 0.276 3.13e-01 0.0599
pander::pander(t(rrCoxTestAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.43 1.27 1.6 6.09e-09
pander::pander(rrCoxTestAnalysis$c.index,caption="C. Index")
  • C Index: 0.666

  • Dxy: 0.331

  • S.D.: 0.031

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 177175

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.666 0.666 0.634 0.694
pander::pander(t(rrCoxTestAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.663 0.622 0.703
pander::pander((rrCoxTestAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.341 0.288 0.398
pander::pander((rrCoxTestAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.871 0.833 0.903
pander::pander(t(rrCoxTestAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.457 0.386
pander::pander(rrCoxTestAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 85.405348 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 448 160 219.0 15.89 60.334
class=1 86 37 33.9 0.29 0.328
class=2 152 102 46.1 67.59 81.340

Calibrating the index on the test data

calprob <- CoxRiskCalibration(ml,dataBrestCancerTest,"status","time")

( 5.660902 , 3012.376 , 1.29501 , 299 , 328.2351 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.597 1.03 5.66
rdata <- cbind(dataBrestCancerTest$status,calprob$prob)

rrAnalysis <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

After Calibration Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.630 0.5243 0.4613 0.304 2.42e-01 0.500
RR 1.741 1.7093 1.6788 3.108 2.20e+01 1.675
RR_LCI 1.479 1.4550 1.4164 1.630 4.75e-02 1.424
RR_UCI 2.048 2.0079 1.9898 5.925 1.02e+04 1.972
SEN 0.268 0.4181 0.5485 0.973 1.00e+00 0.452
SPE 0.899 0.7984 0.6796 0.119 1.29e-02 0.765
BACC 0.583 0.6083 0.6140 0.546 5.06e-01 0.608
NetBenefit 0.020 0.0569 0.0843 0.207 2.58e-01 0.064
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.17 1.04 1.31 0.0086
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.666

  • Dxy: 0.331

  • S.D.: 0.031

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 177175

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.666 0.666 0.634 0.695
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.663 0.622 0.703
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.264 0.215 0.318
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.865 0.927
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.63 0.524
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 80.478762 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 483 174 232.7 14.80 67.96
class=1 85 46 32.0 6.16 6.94
class=2 118 79 34.3 58.05 66.45

Logistic Model

Here we train a logistic model on the same data set

## Only label subjects that present event withing five years

dataBrestCancerR <- subset(dataBrestCancerTrain, time>=5 | status==1)
dataBrestCancerR$status <- dataBrestCancerR$status * (dataBrestCancerR$time < 5)
dataBrestCancerR$time <- NULL

#ml <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=20,NumberofRepeats = 5)
mlog <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=1,NumberofRepeats = 5)

—–..

sm <- summary(mlog)
pander::pander(sm$coefficients)
  Estimate lower OR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
size_nodes 1.05e-03 1.001 1.001 1.001 0.669 0.571 0.668 0.627 0.500 0.628 0.11233 0.63654 17.86 18.870 0.128490 1
nodes 4.33e-02 1.040 1.044 1.048 0.676 0.634 0.690 0.639 0.621 0.662 0.07110 0.57106 14.13 16.179 0.040494 1
grade_nodes 1.50e-02 1.014 1.015 1.016 0.682 0.637 0.686 0.649 0.624 0.655 0.06580 0.54866 13.66 15.650 0.031087 1
age_nodes 1.06e-03 1.001 1.001 1.001 0.678 0.653 0.686 0.642 0.621 0.657 0.03346 0.21312 9.39 5.710 0.035896 1
size_grade 1.75e-03 1.001 1.002 1.002 0.632 0.682 0.686 0.626 0.646 0.655 0.01787 0.29411 6.74 7.728 0.008648 1
age_size 8.73e-05 1.000 1.000 1.000 0.608 0.682 0.686 0.577 0.649 0.657 0.01534 0.29152 6.41 7.652 0.007600 1
grade 2.27e-01 1.168 1.254 1.347 0.571 0.683 0.690 0.500 0.653 0.662 0.01340 0.19036 6.20 4.983 0.008461 1
age_meno -6.04e-03 0.992 0.994 0.996 0.571 0.676 0.686 0.500 0.645 0.657 0.00782 0.08057 4.76 2.337 0.012065 1
age_pgr -5.42e-06 1.000 1.000 1.000 0.571 0.686 0.686 0.500 0.656 0.657 0.00512 0.00745 4.11 0.194 0.000417 1
age_grade -1.65e-03 0.997 0.998 0.999 0.574 0.690 0.690 0.507 0.661 0.662 0.00454 0.11372 3.60 2.960 0.000315 1
meno_grade 1.02e-01 1.045 1.107 1.173 0.571 0.683 0.686 0.500 0.652 0.657 0.00425 0.20428 3.47 5.343 0.004441 1
nodes_hormon -1.38e-02 0.979 0.986 0.994 0.587 0.688 0.686 0.526 0.658 0.655 0.00280 0.45522 3.44 12.150 -0.002853 1
size 3.94e-03 1.002 1.004 1.006 0.611 0.693 0.690 0.618 0.663 0.662 0.00507 0.21050 3.42 5.600 -0.001075 1
meno_pgr 3.19e-04 1.000 1.000 1.001 0.571 0.687 0.686 0.500 0.657 0.657 0.00316 0.05977 3.35 1.558 -0.000429 1
pgr -1.07e-04 1.000 1.000 1.000 0.571 0.689 0.686 0.500 0.659 0.655 0.00257 0.19759 2.64 5.745 -0.004123 1
meno_nodes -2.60e-02 0.955 0.974 0.994 0.640 0.686 0.686 0.595 0.656 0.657 0.00264 -0.06329 2.59 -1.645 0.000631 1
grade_pgr -3.51e-05 1.000 1.000 1.000 0.571 0.669 0.668 0.500 0.627 0.628 0.00241 0.17471 2.55 5.058 0.001252 1
meno_size 2.34e-03 1.000 1.002 1.004 0.604 0.691 0.690 0.578 0.663 0.662 0.00185 0.10227 2.43 2.662 -0.001378 1

Logistic Model Performance

op <- par(no.readonly = TRUE)


cprob <- predict(mlog,dataBrestCancerTrain)

rdata <- cbind(dataBrestCancerTrain$status,cprob)
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5.0)

par(op)

Training Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.394 0.255 0.130969 0.500
RR 1.765 1.739 1.799 2.213 1.000000 1.773
RR_LCI 1.659 1.627 1.676 1.764 0.000000 1.665
RR_UCI 1.879 1.858 1.931 2.777 0.000000 1.888
SEN 0.327 0.470 0.566 0.962 1.000000 0.374
SPE 0.900 0.799 0.731 0.125 0.000683 0.874
BACC 0.613 0.635 0.648 0.543 0.500342 0.624
NetBenefit 0.108 0.165 0.202 0.342 0.435125 0.129
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.817 0.776 0.859 3.78e-16
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206600

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.667 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7

Results on the validation set using Logistic model

pre <- predict(mlog,dataBrestCancerTest)
rdata <- cbind(dataBrestCancerTest$status,pre)

rrAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5)

par(op)

Validation Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.541 @:0.431 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.439 0.306 2.31e-01 0.4996
RR 1.792 1.702 1.756 2.678 2.20e+01 1.7318
RR_LCI 1.529 1.428 1.477 1.679 4.75e-02 1.4731
RR_UCI 2.100 2.029 2.088 4.271 1.02e+04 2.0360
SEN 0.395 0.595 0.579 0.950 1.00e+00 0.4482
SPE 0.832 0.638 0.669 0.181 1.29e-02 0.7804
BACC 0.613 0.617 0.624 0.565 5.06e-01 0.6143
NetBenefit 0.060 0.105 0.106 0.210 2.69e-01 0.0717
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.03 0.921 1.16 0.556
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.67 0.638 0.698
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Logistic Model Poisson Calibration

riskdata <- cbind(dataBrestCancerTrain$status,predict(mlog,dataBrestCancerTrain,type="prob"),dataBrestCancerTrain$time)
calprob <- CalibrationProbPoissonRisk(riskdata)

( 7.480401 , 29202.06 , 1.116959 , 1518 , 1838.456 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Logistic Calibration Parameters")
h0 Gain DeltaTime
0.689 1.33 7.48
timeinterval <- calprob$timeInterval;
gain <- calprob$hazardGain

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated logistic: training

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6463 0.528 0.486 0.324 0.170289 0.500
RR 1.7654 1.739 1.799 2.213 1.000000 1.764
RR_LCI 1.6587 1.627 1.676 1.764 0.000000 1.648
RR_UCI 1.8790 1.858 1.931 2.777 0.000000 1.889
SEN 0.3267 0.470 0.566 0.962 1.000000 0.519
SPE 0.8996 0.799 0.731 0.125 0.000683 0.765
BACC 0.6132 0.635 0.648 0.543 0.500342 0.642
NetBenefit 0.0763 0.129 0.163 0.284 0.408374 0.149
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.919 0.873 0.966 0.000857
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206587

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.666 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.645 0.528
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7
probLog <- predict(mlog,dataBrestCancerTest)
aprob <- adjustProb(probLog,gain)

rdata <- cbind(dataBrestCancerTest$status,aprob)
rrAnalysisTestLogistic <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated validation

pander::pander(t(rrAnalysisTestLogistic$keyPoints),caption="Threshold values")
Threshold values
  @:0.645 @:0.528 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.645672 0.5277 0.5360 0.385 2.94e-01 0.5001
RR 1.791927 1.7024 1.7562 2.678 2.20e+01 1.7232
RR_LCI 1.529135 1.4283 1.4771 1.679 4.75e-02 1.4313
RR_UCI 2.099881 2.0290 2.0880 4.271 1.02e+04 2.0746
SEN 0.394649 0.5953 0.5786 0.950 1.00e+00 0.6555
SPE 0.832041 0.6382 0.6693 0.181 1.29e-02 0.5762
BACC 0.613345 0.6168 0.6239 0.565 5.06e-01 0.6159
NetBenefit -0.000601 0.0315 0.0367 0.125 2.04e-01 0.0466
pander::pander(t(rrAnalysisTestLogistic$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.16 1.04 1.3 0.0105
pander::pander(rrAnalysisTestLogistic$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.669 0.638 0.699
pander::pander(t(rrAnalysisTestLogistic$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysisTestLogistic$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.645 0.528
pander::pander(rrAnalysisTestLogistic$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Comparing the COX and Logistic Models on the Independent Data

pander::pander(t(rrCoxTestAnalysis$OAcum95ci))
mean 50% 2.5% 97.5%
1.24 1.24 1.23 1.25
pander::pander(t(rrAnalysisTestLogistic$OAcum95ci))
mean 50% 2.5% 97.5%
0.945 0.944 0.941 0.948
pander::pander(t(rrCoxTestAnalysis$OE95ci))
mean 50% 2.5% 97.5%
1.21 1.21 1.17 1.24
pander::pander(t(rrAnalysisTestLogistic$OE95ci))
mean 50% 2.5% 97.5%
0.987 0.988 0.962 1.01
maxobs <- sum(dataBrestCancerTest$status)

par(mfrow=c(1,2),cex=0.75)

plot(rrCoxTestAnalysis$CumulativeOvs[,1:2],type="l",lty=1,
     main="Cumulative Probability",
     xlab="Observed",
     ylab="Cumulative Probability",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$CumulativeOvs[,1:2],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)


plot(rrCoxTestAnalysis$CumulativeOvs$Observed,
     rrCoxTestAnalysis$CumulativeOvs$Cumulative-
       rrCoxTestAnalysis$CumulativeOvs$Observed,
     main="Cumulative Risk Difference",
     xlab="Observed",
     ylab="Cumulative Risk - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$CumulativeOvs$Observed,
     rrAnalysisTestLogistic$CumulativeOvs$Cumulative-
       rrAnalysisTestLogistic$CumulativeOvs$Observed,
     lty=2,
     col="red")
legend("topleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData[,2:3],type="l",lty=1,
     main="Expected over Time",
     xlab="Observed",
     ylab="Expected",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$OEData[,2:3],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData$Observed,
     rrCoxTestAnalysis$OEData$Expected-
       rrCoxTestAnalysis$OEData$Observed,
     main="Expected vs Observed Difference",
     xlab="Observed",
     ylab="Cumulative - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$OEData$Observed,
     rrAnalysisTestLogistic$OEData$Expected-
       rrAnalysisTestLogistic$OEData$Observed,
     lty=2,col="red")

legend("bottomleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

par(op)